********************************************************************************

* Filename: Table_5_TeachingPracticeImpacts.do

********************************************************************************
*  This do-file is part of the collection of replication files for Schaffner, Glewwe, and Sharma, 
*  "Why Programs Fail:  Lessons for Improving Public Service Quality from a Mixed Methods Evaluation
*   of an Unsuccessful Teacher Training Program in Nepal"
*
*This do-file calculates:
*      - all results in Table 5 of the WBER journal article
*	   - all results in Tables S4 and S5 of the Supplementary Online Appendix
*	   - all multiple hypothesis testing correction results related to those tables
*     
* Software version used: STATA/SE 18.0
* 
* In the case of IV/LATE estimates, the do-file implements adjustments to standard errors and
* confidence intervals, and assesses statistical significance, following:
*		Lee, David S., Justin McCrary, Marcelo J. Moreira, and Jack Porter, 2022,
*		"Valid t-Ratio Inference for IV" American Economic Review 112(10):3260-3290.
*
* Making use of adjustments in the Lee et al. paper, we also bound the p-values for the 
* tests of significant impact in IV estimation.  We use these p-value bounds in
* multiple hypothesis testing corrections for the evaluation of statistical significance.
*
********************************************************************************
*GLOBAL FILE PATH DEFINITIONS
	global PBRfolder "ADD FILE REFERENCE TO MAIN FOLDER HERE" 
	global datasets = "$PBRfolder\Datasets"
	global logs = "$PBRfolder\Logs"

********************************************************************************
* SET-UP
	set more off
	clear all
	capture log close
	log using "$logs\Table_5_teaching_practice_impacts", replace
	cd "$datasets"

****************************************************************************

****PREPARE SCHOOL-LEVEL DATA WITH TREATMENT ARM AND SURVEY DESIGN VARIABLES

****************************************************************************

	use basicdata, clear
	
	*Create new strata variable = district*10+stratum
	gen dist_stratum = (district*10)+stratum
	gen treat=studyarm==1 | studyarm==2
	tab studyarm treat
		
	*Set survey design parameters
	svyset schoolid [pweight=sch_wght], strata(dist_stratum)
	tempfile sch_temp
	save "`sch_temp'", replace

	
****************************************************************************

****DEFINE PROGRAM FOR IMPLEMENTING tF CORRECTIONS AFTER IV/LATE ESTIMATION

****************************************************************************

	capture program drop tFcorrections

	program tFcorrections
	
		/* This program does not fully automate the corrections.  It just speeds up "by hand"
		adjustments using Tables 3A and 3B in the Lee et al. paper.  The user must fill in the following
		scalars before running the program.  */
		
		
		/* Precede use of this program with this code to set up inputs:
			scalar FF =  			  	// F for test of signif of treat in first stage regression 
			scalar F5_below = 			// F in table 3a just below FF 
			scalar F5_above = 			// F in table 3a just above FF
			scalar A5_below = 			// adjustment factor in 3a for F5below
			scalar A5_above = 			// adjustment factor in 3a for F5above
			scalar F1_below = 			// F in table 3b just below FF
			scalar F1_above = 			// F in table 3b just above FF
			scalar A1_below =			// adjustment factor in 3b for F1below
			scalar A1_above =			// adjustment factor in 3b for F1above
			scalar raw_pval =			// unadjusted pvalue for treated in second stage 
			capture drop in_sample
			gen in_sample =   any if-statement for selecting the sample for iv estimation
			gen testscore =             // left hand side variable
			global reg_command "`e(command)'"  // the relevant ivregress command
		*/
			
		* Determine whether adjustments are needed
			scalar NeedAdjust5 = FF < 104.67
			scalar NeedAdjust1 = FF<= 252.342
			
							
		* Calc 5% adjusted std error, confidence interval, and statis significance
		
			* Determine adjustment factor 
			
				scalar Adjust5 = 1
		
				if NeedAdjust5==1 {
				
					scalar Adjust5 = A5_below + [(A5_above - A5_below) * ((FF-F5_below)/(F5_above - F5_below))]
													
				}
			
			* Calc 5% adjusted standard error 
				$reg_command  // runs the ivregress command from just before this program was invoked
				scalar  se_adj5 = _se[treated]*Adjust5
								
			* Calc 5% adjusted confidence interval
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower5 = r(table)[5,1]  // unadjusted lower bound
				scalar upper5 = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower5 
				scalar lower5_adj5 = middle - dist*Adjust5
				scalar upper5_adj5 = middle + dist*Adjust5 
			
			* Determine whether significant at 5% level
				scalar signif5 = (0 < lower5_adj5) | (0 > upper5_adj5) 

					
		* Calc 1% adjusted std error, confidence interval, and statis significance
		
			* Determine adjustment factor 
			
				scalar Adjust1 = 1
		
				if NeedAdjust1==1 {
				
					scalar Adjust1 = A1_below + [(A1_above - A1_below) * ((FF-F1_below)/(F1_above - F1_below))]
													
				}
			

			* Calc 1% adjusted standard error 
				 local reg_99 "$reg_command , level(99)"
				`reg_99'
				scalar  se_adj1 = _se[treated]*Adjust1
								
			* Calc 1% adjusted confidence interval
				
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower = r(table)[5,1]  // unadjusted lower bound
				scalar upper = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower 
				scalar lower1_adj1 = middle - dist*Adjust1
				scalar upper1_adj1 = middle + dist*Adjust1
			
								
			* Determine whether significant at 5% level
				scalar signif1 = (0< lower1_adj1) | 0 > (upper1_adj1) 
		
							
		* Calc 10% adjusted std error, confidence interval, and statis significance
		
			* Use 5% adjustment factor and interpret as "conservative" (in most contexts)
			* Since, for a given F-value, the adjustments for 1% significance in Table 3B of Lee et al. (2022) are larger than the adjustments for 5% 
			* significance in Table 3A of Lee et al. (2022), using the 5% adjustment as the 10% adjustment is likely too large of an adjustment, and so the
			* increases in the standard errors and confidence intervals for 10% significance will be "too large" and thus will tend to reduce the probability
			* of rejecting a null hypothesis of no significance at the 10% level.
			
				scalar Adjust10 = Adjust5
				
			* Calc 10% adjusted standard error 
				local reg_90 "$reg_command , level(90)"
				`reg_90'
											
				scalar se_adj10 = _se[treated]*Adjust10
			
			
			* Calc 10% adjusted confidence interval (conservative)
				scalar middle = _b[treated]   // coefficient estimate
				scalar lower = r(table)[5,1]  // unadjusted lower bound
				scalar upper = r(table)[6,1]  // unadjusted upper bound
				scalar dist = _b[treated] - lower 
				scalar lower10_adj10 = middle - dist*Adjust10
				scalar upper10_adj10 = middle + dist*Adjust10
							
			* Determine whether significant at 10% level
				scalar signif10 = (0< lower10_adj10) | (0 > upper10_adj10) 
			
		* Calculate approximate adjusted p-value
		
			* We use .05 adjustments to standard errors for adjusting p-values that are >=.05.
			* 	For unadjusted p-values near .05, these adjustments are approximately correct.
			* 	For unadjusted p-values that are substantially larger, these are upper bound adjustments (because the actual adjustments would 
			* 	inflate standard errors by less)
			* We use the .01 adjustments to standard errors for adjusting p-values that are <.05
			*    For unadjusted p-values around .01, these adjustments are approximatley correct.
			*    For unadjusted p-values between .01 and .05, these are conservative.
			*    For for unadjusted p-values less than .01 (not relevant for our results), the true corrections would be larger.
			
			* raw pvalue
			$reg_command  // runs the ivregress command from just before this program was invoked
			test treated 
			scalar raw_pval = r(p)
								
			$reg_command  // runs the ivregress command from just before this program was invoked
			
			* replicating unadjusted p-value
			scalar tt = middle/_se[treated]
			scalar dfx = e(df_r)   // e(d_fr) stores the design df from the regression
			scalar p_noadj = 2*ttail(dfx, abs(tt))
			
			* Create adjusted t-ratio and p-value
			if raw_pval >=0.05 {
				scalar t_adj = middle/se_adj5 
				scalar dfx = e(df_r)
				scalar p_adj = 2*ttail(dfx,abs(t_adj))
			}
			if raw_pval<0.05 {
				scalar t_adj = middle/se_adj1 
				scalar dfx = e(df_r)
				scalar p_adj = 2*ttail(dfx,abs(t_adj))
			}
						
		* Work out stars 
		
			scalar Nstars= [signif10==1] + [signif5==1] + [signif1==1]
		
		* Report
			di "     "
			di "     "
			di "Unadjusted standard error=     "  %9.3f _se[treated]
			di "Adjusted 5% standard error =    "  %9.3f se_adj5
			di "Unadjusted 95% confidence interval  [ " %9.3f lower5  " , " %9.3f upper5 " ]"
			di "Adjusted 95% confidence interval  [ " %9.3f lower5_adj5 " , " %9.3f upper5_adj5 " ]"
			di "Adjusted number of stars=  " Nstars
			di "Unadjusted and adjusted p-values (adjusted upper bound if unadjusted value>.05):  "  %9.6f p_noadj "  " %9.6f p_adj 
			
	end 


****************************************************************************

****CREATE A SHORT PROGRAM JUST FOR DISPLAYING ADJUSTED RESULTS 

****************************************************************************	
	
	capture drop print_res
					program print_res
								
						di "     "
						di "     "
						di "Unadjusted standard error=     "  %9.3f _se[treated]
						di "Adjusted 5% standard error =    "  %9.3f se_adj5
						di "Unadjusted 95% confidence interval  [ " %9.3f lower5  " , " %9.3f upper5 " ]"
						di "Adjusted 95% confidence interval  [ " %9.3f lower5_adj5 " , " %9.3f upper5_adj5 " ]"
						di "Adjusted number of stars=  " Nstars
						di "Unadjusted and adjusted p-values (adjusted upper bound if unadjusted value>.05):  "  %9.6f p_noadj "  " %9.6f p_adj 
					end
	

********************************************************************************

****LATE IMPACTS ON TEACHING PRACTICES BY STUDENT REPORT (APPENDIX TABLES S4 AND S5)

********************************************************************************

	* CREATE DATASET OF TEACHER CHARACTERISTICS AND TREATMENT STATUS

			use Teacher_c, clear
			gen texp05yr=(t_exper<=5) if t_exper~=.
			keep teacherid t_perm texp05yr t_g9_math t_g10_math t_g9_sci t_g10_sci
			sort teacherid
			
			* Bring in SSRP data 
			merge 1:1 teacherid using SSRPdata
			//48 unmatched (2 master, 46 using)
			l teacherid if _m==1 
					*Drop obs not in main teacher data  
			*drop if _m==1 //2
			keep teacherid t_perm texp05yr t_g9_math t_g10_math t_g9_sci t_g10_sci SSRP*
			sort teacherid
			tempfile tchrtemp
			save "`tchrtemp'" , replace
			
			* Prepare SSDP attendance data
			use SSDP_VA_attendance, clear
			count if teacherid=="." //Cannot match these 23 teachers to students
			drop if teacherid=="." //23
			
			* Following are fixes for Jumla district
			replace ssdp_math_days=. if district==63 & ssdp_math==1 
			replace ssdp_sci_days=. if district==63 & ssdp_sci==1
			
			* Define math training as completed 6 or more days, if days missing assume > 6 
			tab ssdp_math_days ssdp_math, mi
			gen ssdp_m_t=(ssdp_math_days==6 | ssdp_math_days==10)
			replace ssdp_m_t=1 if ssdp_math==1 & ssdp_math_days==.
			label var ssdp_m_t "Math teacher had SSDP training"
			tab ssdp_m_t
			
			* Define science training as completed 9-10 days, if days missing assume > 9 
			tab ssdp_sci_days ssdp_sci, mi
			gen ssdp_s_t=(ssdp_sci_days==9 | ssdp_sci_days==10)
			replace ssdp_s_t=1 if ssdp_sci==1 & ssdp_sci_days==.
			label var ssdp_m_t "Science teacher had SSDP training"
			tab ssdp_s_t
			
			* Merge teacher questionnaire data into SSDP attendance data
			sort teacherid
			merge 1:1 teacherid using "`tchrtemp'" 
			//122 unmatched (122 master)
			//Note: More obs. in SSDP training than in other 2 datasets
			drop _m 
			sort teacherid
			save "`tchrtemp'" , replace
			
	* PREPARE DATASETS THAT MATCH GRADE 9 AND 10 STUDENTS TO THEIR MATH AND SCIENCE TEACHERS 
	* AND KEEP TEACHER VARIABLES AT THE STUDENT LEVEL
	* If a student is matched to more than one teacher, average teacher characteristics.

			* MATCHING GRADE 9 STUDENTS TO MATH TEACHERS
			use T_stu_sections09, clear
					
			numlabel, add
			tab m_match_type  // 1=matched to 1 teacher, 2=not matched to any, 3= matched to two or more teachers
			drop if m_match_type==2 //745 students not matched to any math teacher
			
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that mathteacherid is "blank" if 2 different teachers exist
			unique stu_serial  //6056 students and observations
			expand 2 if m_match_type==3 //154 more obs created
			
			* Fill in teacherids for students with multiple matches
			bysort stu_serial: gen stuobnum=_n
			tab stuobnum
			replace mathteacherid=mathteacheridA if m_match_type==3 & stuobnum==1
			replace mathteacherid=mathteacheridB if m_match_type==3 & stuobnum==2
			rename mathteacherid teacherid
			sort teacherid
			keep schoolid district stu_serial teacherid m_match_type
			
			* Bring in teacher data and average over teachers if students has 2 teachers 
			merge m:1 teacherid using "`tchrtemp'"
			// 400 unmatched
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* don't teach gr 9 math */
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 9 math */
			drop if _m==2 //400, drop unmatched teachers, who don't teach grade 9 math
			rename t_perm mat_t_perm
			rename texp05yr mat_t_exp05yr
			rename SSRPmath mat_t_ssrp_train
			keep stu_serial mat_t_* ssdp_m_t m_match_type
			
			* Average teacher characteristics over student IDs and save
			sort stu_serial
			unique stu_serial // 6056 students, 6210 records
			collapse mat_t_perm mat_t_exp05yr mat_t_ssrp_train ssdp_m_t m_match_type, by(stu_serial) 
			unique stu_serial // 6056 students and records
			label var mat_t_perm "Math teacher has permanent contract"
			label var mat_t_exp05yr "Math teacher experience 5 years or less"
			label var mat_t_ssrp_train "Math teacher has had SSRP training"
			tempfile g09mattc
			save "`g09mattc'" , replace
			
			* MATCHING GRADE 10 STUDENTS TO MATH TEACHERS

			use T_stu_sections10, clear
			numlabel, add
			count if stu_serial=="" //0
			drop if stu_serial=="" //0
			tab m_match_type
			drop if m_match_type==2 //303, students not matched to any math teacher 
			
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that mathteacherid is "blank" if 2 different teachers exist
			unique stu_serial // 5530 students and records
			expand 2 if m_match_type==3 // 137 records created
			
			* Fill in teacherids for students with multiple matches
			bysort stu_serial: gen stuobnum=_n
			tab stuobnum
			replace mathteacherid=mathteacheridA if m_match_type==3 & stuobnum==1
			replace mathteacherid=mathteacheridB if m_match_type==3 & stuobnum==2
			rename mathteacherid teacherid
			sort teacherid
			keep schoolid district stu_serial teacherid m_match_type
			
			* Bring in teacher data and average over teachers if students has 2 teachers 
			merge m:1 teacherid using "`tchrtemp'"
			//838 unmatched (420 master, 418 using)
			
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* a few teach gr 10 math */
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 10 math */
			count if _m==2 & t_g10_math==1 //13
			drop if _m==2 //418, drop unmatched teachers, who mostly don't teach grade 10 math
			
			rename t_perm mat_t_perm
			rename texp05yr mat_t_exp05yr
			rename SSRPmath mat_t_ssrp_train
			keep stu_serial mat_t_* ssdp_m_t m_match_type
			
			* Average teacher characteristics over student IDs and save
			sort stu_serial
			unique stu_serial // 5530 students, 5667 records
			collapse mat_t_perm mat_t_exp05yr mat_t_ssrp_train ssdp_m_t m_match_type, by(stu_serial) 
			unique stu_serial // 5530 records
			label var mat_t_perm "Math teacher has permanent contract"
			label var mat_t_exp05yr "Math teacher experience 5 years or less"
			label var mat_t_ssrp_train "Math teacher has had SSRP training"
			tempfile g10mattc
			save "`g10mattc'" , replace

			* MATCHING GRADE 9 STUDENTS TO SCIENCE TEACHERS
			use T_stu_sections09, clear
			numlabel, add
			tab s_match_type
			tab1 sciteacherid sciteacheridA sciteacheridB if s_match_type==2  
			drop if s_match_type==2 //802, students not matched to any science teacher
			
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that sciteacherid is "blank" if 2 different teachers exist
			l sciteacheridA sciteacheridB if sciteacherid==".d"
			tab sciteacherid if sciteacheridA~="" | sciteacheridB~=""
			unique stu_serial // 5999 students and records
			expand 2 if s_match_type==3 // 281 records created
			
			* Fill in teacher ids for students with multiple matches
			bysort stu_serial: gen stuobnum=_n
			tab stuobnum
			replace sciteacherid=sciteacheridA if s_match_type==3 & stuobnum==1
			replace sciteacherid=sciteacheridB if s_match_type==3 & stuobnum==2
			rename sciteacherid teacherid
			sort teacherid 
			keep schoolid district stu_serial teacherid s_match_type
			
			* Bring in teacher data and average over teachers if students has 2 teachers 
			merge m:1 teacherid using "`tchrtemp'"
			// 399 unmatched
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* don't teach gr 9 sci */
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 9 science */
			drop if _m==2 //399, drop unmatched teachers, who don't teach grade 9 science
			rename t_perm sci_t_perm
			rename texp05yr sci_t_exp05yr
			rename SSRPsci sci_t_ssrp_train
			keep stu_serial sci_t_* ssdp_s_t s_match_type
			
			* Average teacher characteristics over student IDs and save
			sort stu_serial
			unique stu_serial //5999 students, 6280 records
			collapse sci_t_perm sci_t_exp05yr sci_t_ssrp_train ssdp_s_t s_match_type, by(stu_serial) 
			unique stu_serial // 5999 records and students
			label var sci_t_perm "Science teacher has permanent contract"
			label var sci_t_exp05yr "Science teacher experience 5 years or less"
			label var sci_t_ssrp_train "Science teacher has had SSRP training"
			tempfile g09scitc
			save "`g09scitc'" , replace

			* MATCHING GRADE 10 STUDENTS TO SCIENCE TEACHERS
			use T_stu_sections10, clear
			numlabel, add
			count if stu_serial=="" //0
			drop if stu_serial=="" //0
			tab s_match_type
			tab1 sciteacherid sciteacheridA sciteacheridB if s_match_type==2  
			drop if s_match_type==2 //234, students not matched to any science teacher
			
			* Create duplicate observations for kids matched to more than 1 teacher
			* Note that sciteacherid is "blank" if 2 different teachers exist
			l sciteacheridA sciteacheridB if sciteacherid==".d"
			tab sciteacherid if sciteacheridA~="" | sciteacheridB~=""
			unique stu_serial // 5599 students and records
			expand 2 if s_match_type==3 // 320 observations created
			
			* Pull over teacherids for students with multiple matches
			bysort stu_serial: gen stuobnum=_n
			tab stuobnum
			replace sciteacherid=sciteacheridA if s_match_type==3 & stuobnum==1
			replace sciteacherid=sciteacheridB if s_match_type==3 & stuobnum==2
			rename sciteacherid teacherid
			sort teacherid
			keep schoolid district stu_serial teacherid s_match_type
			
			* Bring in teacher data and average over teachers if students has 2 teachers 
			merge m:1 teacherid using "`tchrtemp'"
			//975 unmatched (558 master, 417 using)
			
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==2 /* mostly don't teach gr 10 sci */
			su t_g9_math t_g10_math t_g9_sci t_g10_sci if _m==3 /* teach grade 10 science */
			
			count if _m==2 & t_g10_sci==1 //12, Teachers for 12 kids w/ missing ID 
			drop if _m==2 //417, drop unmatched teachers, who mostly don't teach grade 10 science
			rename t_perm sci_t_perm
			rename texp05yr sci_t_exp05yr
			rename SSRPsci sci_t_ssrp_train
			keep stu_serial sci_t_* ssdp_s_t s_match_type
			
			* Average teacher characteristics over student IDs and save
			sort stu_serial
			unique stu_serial // 5599 students 5919 records
			collapse sci_t_perm sci_t_exp05yr sci_t_ssrp_train ssdp_s_t s_match_type, by(stu_serial) 
			unique stu_serial  // 5599 students and records
			label var sci_t_perm "Science teacher has permanent contract"
			label var sci_t_exp05yr "Science teacher experience 5 years or less"
			label var sci_t_ssrp_train "Science teacher has had SSRP training"
			tempfile g10scitc
			save "`g10scitc'" , replace
			
			
	* PREPARE STUDENT DATA FROM STUDENT QUESTIONNAIRE 

			*Grade 9 student data
			use Grade09_c, clear
			sort stu_serial	

			* Merge in math teacher info
			merge 1:1 stu_serial using "`g09mattc'" // most of the "students" who do not match to endline teachers here are baseline students 
													// not present at endline
			drop _m
			
			* Merge in science teacher info
			merge 1:1 stu_serial using "`g09scitc'" 
			drop _m
			
			save gr9_temp, replace
			
			* Grade 10 student data
			use Grade10_c, clear
			sort stu_serial	
		
			*Merge in math teacher info
			merge 1:1 stu_serial using "`g10mattc'" // again, most of the "students" who do not match to endline teachers here are baseline students
													// not present at endline
			drop _m
		
			*Merge in science teacher info 
			merge 1:1 stu_serial using "`g10scitc'" 
			drop _m
		
			* Append grade 9 to grade 10 student data
			append using gr9_temp 
			count
			drop if s_present==5 //5950 observations dropped - students who were in baseline but not endline
			
			*Merge in school data on study arm and survey design 
			sort schoolid
			merge m:1 schoolid using "`sch_temp'" /* all matched */

			
	* CHECK VARIATION IN TEACHER PRACTICE VARIABLES, COMBINE CATEGORIES WITH HAVE <5%, CHECK FOR MISSING VALUES

			numlabel, add
			
			*Teacher gives homework		
			replace ma_hwfreq=. if ma_hwfreq==9  
			replace sc_hwfreq=. if sc_hwfreq==9
			tab1 ma_hwfreq sc_hwfreq 
			replace ma_hwfreq=4 if ma_hwfreq<=3
			replace sc_hwfreq=2 if sc_hwfreq==1
			replace ma_hwfreq=ma_hwfreq-3  
			replace sc_hwfreq=sc_hwfreq-1
			tab1 ma_hwfreq sc_hwfreq, nol
			//Relevant responses (Math) are: Two or three times a week or less or never (0); Every day (1)
			//Relevant responses (science) are:Less than Once a week or less (1); once a week (2) ; Two or three times a week (3); Every day (4)
			
			* Teacher checks homework
			replace ma_hwcheck=. if ma_hwcheck==9
			replace sc_hwcheck=. if sc_hwcheck==9
			tab1 ma_hwcheck sc_hwcheck
			replace ma_hwcheck=2 if ma_hwcheck==1
			replace sc_hwcheck=2 if sc_hwcheck==3
			tab1 ma_hwcheck sc_hwcheck
			replace ma_hwcheck=ma_hwcheck-1 
			replace sc_hwcheck=sc_hwcheck-1 if sc_hwcheck>=4 
			tab1 ma_hwcheck sc_hwcheck, nol
			//Relevant responses (Math) are: less than Once a week  or never (1); once a week (2) ; Two or three times a week (3); Every day (4)
			//Relevant responses (science) are: Never (1); Once a week or less (2); Two or three times a week (3); Every day (4)
					
			* Teacher returns homework w/ corrections
			replace ma_hwreturned=. if ma_hwreturned==9
			replace sc_hwreturned=. if sc_hwreturned==9
			tab1 ma_hwreturned sc_hwreturned
			replace ma_hwreturned=2 if ma_hwreturned==1
			replace sc_hwreturned=2 if sc_hwreturned==3
			tab1 ma_hwreturned sc_hwreturned
			replace ma_hwreturned=ma_hwreturned-1
			replace sc_hwreturned=sc_hwreturned-1 if sc_hwreturned>=4 
			tab1 ma_hwreturned sc_hwreturned, nol
			//Relevant responses (Math) are: less than once a week or never (1) Once a week (2); Two or three times a week (3); Every day (4)
			//Relevant responses (science) are: Never (1); Once a week or less (2); Two or three times a week (3); Every day (4)
			
			* Teacher asks questions, interacts with any student
			replace ma_qnask=. if ma_qnask==9
			replace sc_qnask=. if sc_qnask==9
			tab1 ma_qnask sc_qnask 
			replace ma_qnask = 2 if ma_qnask==1
			replace sc_qnask = 2 if sc_qnask==1
			replace ma_qnask=ma_qnask-1
			replace sc_qnask=sc_qnask-1
			tab1 ma_qnask sc_qnask
			//Relevant responses (math and science) are: less than once a week or never (1) Once a week (2); Two or three times a week (3); Every day (4)
			
			* Teacher requires group study 
			tab1 ma_groupstudy sc_groupstudy
			replace ma_groupstudy=. if ma_groupstudy==9
			replace sc_groupstudy=. if sc_groupstudy==9
			tab1 ma_groupstudy sc_groupstudy
			//Relevant responses remain the same
			
			* Teacher demonstrates w/ objects
			tab1 ma_demostrations1 sc_demostrations1 
			replace ma_demostrations1=. if ma_demostrations1==9
			replace sc_demostrations1=. if sc_demostrations1==9
			//Relevant responses remain the same
			
			* Teacher demonstrates w/ diagrams
			tab1 ma_demostrations2 sc_demostrations2 
			replace ma_demostrations2=. if ma_demostrations2==9
			replace sc_demostrations2=. if sc_demostrations2==9
			tab1 ma_demostrations2 sc_demostrations2
			//Relevant responses remain the same

	* PREPARE FOR DESCRIPTIVE STATISTICS AND REGRESSIONS BY SETTING UP SURVEY DESIGN PARAMETERS	
		
			svyset schoolid, strata(dist_stratum) weight(sch_wght)
		
	* SAVE DATASET SO CAN USE AGAIN FOR WORKING WITH SCIENCE TEACHER PRACTICES

			tempfile save_all
			save "`save_all'", replace
		
	* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		
			gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
			gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
			gen str20 row_spec = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
			local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1
		
				
	* FOR ONLINE APPENDIX TABLE S4, STUDENT-REPORTED TEACHING PRACTICES, MATH TEACHERS
			
			* keeping only sample for LATE
			gen treated = ssdp_m_t 
			keep if treated~=.
				
			* FREQUENCY THAT MATH TEACHER GIVES HOMEWORK 
			
				capture drop outcome_dichot
				gen outcome_dichot=ma_hwfreq
				tab outcome_dichot
																	
				* sample proportions
				svy, over(treat): proportion outcome_dichot if treated~=., cformat(%9.3g)  // put proportions for "2 0" and "2 1" in table
										
				* ITT (not included in table)
				tab outcome_dichot
				replace outcome_dichot = outcome_dichot - 1
				svy: reg outcome_dichot treat district#stratum
				test treat 						
					
				* IV LATE estimate with correction for standard errors and confidence intervals following Lee et al. 2022
					
					* For regressions requiring sample selection, define in_sample
						capture drop in_sample
						gen in_sample= outcome_dichot~=.
										
					* first stage reg
						svy: reg treated treat district#stratum if in_sample==1   // 11,075 observations
						
					* grab F stat from first stage for use in calculating correction to second stage standard errors
						test treat 
						scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections 
										// of second stage standard errors
						di FF   // User needs to choose values from Tables in the Lee paper based on the size of this F stat.
						// 238.6142 
						
																
					* unadjusted second stage estimation 
						svy: ivregress 2sls outcome_dichot district#stratum (treated = treat) if in_sample==1
						estimates store stu_math_homework
						scalar rsquared=e(r2)
																		
					* run program for adjustments 
						
						capture drop testscore 
						gen testscore = outcome_dichot   // outcome is called testscore in program code (but is a teaching practice here)
						global reg_command "`e(cmdline)'"
						
						/* When building up the do-file, the following scalar values were filled in 
						after observing the value of FF above, according to these rules:
						If FF is greater than the highest F value lists in Table 3A or Table 3B
						of Lee et al., fill in the higher F stat value
						in the table for the "below" and "above numbers. For example, for FF=237.2, 
						fill in 104.67 for F5_below and F5_above */
						scalar F5_below = 	104.67  //  F in table 3a just below FF 
						scalar F5_above = 	104.67  // F in table 3a just above FF
						scalar A5_below = 	1.00   // adustment factor in 3a for F5below
						scalar A5_above = 	1.00   // adjustment factor in 3a for F5above
						scalar F1_below = 	174.370		// F in table 3b just below FF
						scalar F1_above = 	252.342		// F in table 3b just above FF
						scalar A1_below =	1.097		// adjustment factor in 3b for F1below
						scalar A1_above =	1.059		// adjustment factor in 3b for F1above
														
						tFcorrections // running program
								
					* store p-value for later multiple hypothesis testing correction
						local j = `j' + 1
						replace p1 = p_noadj if _n==`j'
						replace p2 = p_adj if _n==`j'
						replace row_spec = "stu math give homework" if _n==`j'
			
					* display results for Table S4 Gives homework all days 
						di "OUTCOME IS:  " row_spec[`j']
						etable, column(index) estimates(stu_math_homework) showstars showstarsnote ///
						keep(treated) cstat(_r_b) cstat(_r_se) cstat(_r_ci) mstat(N) ///
						mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
						// display unadjusted results
						di "r-squared =  " rsquared  // The etable command rounds this too much. Reporting again with less rounding.
							
						print_res  // display adjusted results
						// use adjusted p-value in table S4
						
					
						
			* FREQUENCY THAT MATH TEACHER CHECKS HOMEWORK
				capture drop outcome
				gen outcome=ma_hwcheck 
				tab outcome
															
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
					
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
									
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum   // residual not significant
				test treated  // p-value for Table S4
				test tchrtrtresid 
				drop tchrtrtresid 
									
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math hwfreq" if _n==`j'
				
				
				* FREQUENCY THAT MATH TEACHER RETURNS HOMEWORK WITH CORRECTIONS
				capture drop outcome
				gen outcome=ma_hwreturned 
				tab outcome
																			
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
					
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
									
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated  // pvalue for table S4
				test tchrtrtresid 
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math hwreturn" if _n==`j'
					

				* FREQUENCY THAT MATH TEACHER ASKS QUESTIONS, INTERACTS WITH STUDENTS
				capture drop outcome
				gen outcome=ma_qnask 
				tab outcome
																			
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
					
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
									
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated // use p-value in table S4
				test tchrtrtresid 
				drop tchrtrtresid 
				
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math qnask" if _n==`j'

				* FREQUENCY THAT MATH TEACHER REQUIRES GROUP STUDY
				capture drop outcome
				gen outcome=ma_groupstudy 
				tab outcome
																			
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
									
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insig (p-value .102)
				test treated // p-value for table S4
				test tchrtrtresid  
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math qnask" if _n==`j'
				
				* FREQUENCY THAT MATH TEACHER DEMONSTRATES WITH OBJECTS....
				capture drop outcome
				gen outcome=ma_demostrations1 
				tab outcome
																			
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat 
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated   // pvalue for table S4
				test tchrtrtresid  
				drop tchrtrtresid 	
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math demonstrations1" if _n==`j'
				
						
				* FREQUENCY THAT MATH TEACHER DEMONSTRATES WITH DIAGRAMS....
				capture drop outcome
				gen outcome=ma_demostrations2
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated // p-value for table S4
				test tchrtrtresid  
				drop tchrtrtresid 	
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math demonstrations2" if _n==`j'
				
		* save the seven collected pvalues and their specifications in a separate small datasets
				keep if _n<=7
				keep p1 p2 row_spec
				tempfile table_A4
				save "`table_A4'", replace
				clear
					
	* MULTIPLE HYPOTHESIS TESTING CORRECTIONS FOR TABLE S4
				
				use "`table_A4'", clear
				list  // note, we don't have any LATE p-values that required adjustment, so we don't need p2 variable
				
				qqvalue p1, method(yekutieli) qvalue(q1)  
				list row_spec p1 q1  // these corrections not really needed, since nothing was significant before correction
				

		
	* FOR ONLINE APPENDIX TABLE S5, STUDENT-REPORTS TEACHING PRACTICES, SCIENCE TEACHERS
		
			use "`save_all'", clear
			capture drop treated
			gen treated = ssdp_s_t 
			keep if treated~=.
			
							
			* SETUP FOR COLLECTING P-VALUES (FOR LATER USE IN MULTIPLE HYPOTHESIS TESTING CORRECTIONS)
		
			gen p1= .   // We will collect unadjusted pvalues for impact estimates in the variable p1
			gen p2= .   // We will collect adjusted pvalues for impact estimates in the variable p2
			gen str20 row_spec = "to be filled in"  // this will be for verifying that the right p-values are being collected for a given test-taking
			local j= 0   // will increase this by 1 for each specification/pvalue, and use for filling p-values into different observations of the variable p1	
			
							
			* FREQUENCY THAT SCIENCE TEACHER GIVES HOMEWORK
				capture drop outcome
				gen outcome=sc_hwfreq 
				tab outcome
											
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
					
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
					
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat 
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum  // resid insignification
				test treated   // p-value for table S5
				test tchrtrtresid 
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci give hw" if _n==`j'
				
				
			* FREQUENCY THAT SCIENCE TEACHER CHECKS HOMEWORK
				capture drop outcome
				gen outcome=sc_hwcheck 
				tab outcome
															
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=.
				predict tchrtrtresid, r
				test treat  // 
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum  // residual not significant
				test treated // p-value for table S5
				test tchrtrtresid 
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci check homework" if _n==`j'
				
									
			* FREQUENCY THAT SCIENCE TEACHER RETURNS HOMEWORK WITH CORRECTIONS
				capture drop outcome
				gen outcome=sc_hwreturned 
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // residual insignificant
				test treated  // pvalue for table S5
				test tchrtrtresid 
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci hw return" if _n==`j'
				

			* FREQUENCY THAT SCIENCE TEACHER ASKS QUESTIONS, INTERACTS WITH STUDENTS
				capture drop outcome
				gen outcome=sc_qnask 
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated // p-value for table S5
				test tchrtrtresid 
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci questions" if _n==`j'
				
			* FREQUENCY THAT SCIENCE TEACHER REQUIRES GROUP STUDY
				capture drop outcome
				gen outcome=sc_groupstudy 
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // residual insignificant
				test treated // p-value for table S5
				test tchrtrtresid  
				drop tchrtrtresid 
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci group work" if _n==`j'
				
			
			* FREQUENCY THAT SCIENCE TEACHER DEMONSTRATES WITH OBJECTS....
				capture drop outcome
				gen outcome=sc_demostrations1 
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant
				test treated // p-value for table S5
				test tchrtrtresid  
				drop tchrtrtresid 	
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu sci demonstrations1" if _n==`j'
				
				
			* FREQUENCY THAT SCIENCE TEACHER DEMONSTRATES WITH DIAGRAMS....
				capture drop outcome
				gen outcome=sc_demostrations2
				tab outcome
																		
				* sample proportions
				svy, over(treat): proportion outcome if treated~=., cformat(%9.3g)  
				
				* ITT 
				tab outcome
				svy: oprobit outcome treat district#stratum
				test treat
								
				* LATE 
				* first stage and create residual
				svy: reg treated treat district#stratum if outcome~=. 
				predict tchrtrtresid, r
				test treat  
				
				* second stage 
				svy: oprobit outcome treated tchrtrtresid district#stratum // resid insignificant (p-value = .132)
				test treated // p-value for table S5
				test tchrtrtresid  
				drop tchrtrtresid 	
				
				* store p-value
				test treat 
				local j = `j' + 1
				replace p1= r(p) if _n==`j'
				replace p2= r(p) if _n==`j'  // for ITT adjusted and unadjusted p values are the same
				replace row_spec = "stu math demonstrations2" if _n==`j'
					
		* save the seven collected pvalues and their specifications in a separate small datasets
				keep if _n<=7
				keep p1 p2 row_spec
				tempfile table_A5
				save "`table_A5'", replace
				clear
					
	* MULTIPLE HYPOTHESIS TESTING CORRECTIONS FOR TABLE S5
				
				use "`table_A5'", clear
				list  // note, we don't have any LATE p-values that required adjustment, so we don't need p2 variable
						
				qqvalue p1, method(yekutieli) qvalue(q1)  
				list row_spec p1 q1  // these corrections not really needed, since nothing was significant before correction

********************************************************************************

****LATE IMPACTS ON TEACHING PRACTICES BY HEAD TEACHER REPORT (TABLE 5)

********************************************************************************
	
	* USE DATA FROM HT QUESTIONNAIRES, IN WHICH HT REPORTS ON INDIVIDUAL SECONDARY MATH AND SCIENCE TEACHERS
	
		use HTteacher_c, clear
		numlabel, add
		tab t_subjectcat, mi  
		tab t_ssdp 
		tab t_subjectcat, summ(t_ssdp)
		tempfile htreports
		save "`htreports'", replace
		
	* MERGE IN ADMIN DATA ON TEACHER ATTENDANCE AT SSDP TRAININGS
			
		* Bring in teacher SSDP attendance data
		use SSDP_VA_attendance, clear
		count if teacherid=="." //Cannot match these 23 teachers to students
		drop if teacherid=="." //23
						
		* Following are fixes for Jumla district
		replace ssdp_math_days=. if district==63 & ssdp_math==1 
		replace ssdp_sci_days=. if district==63 & ssdp_sci==1
		
		* Define math training as completed 6 or more days, if days missing assume > 6 
		tab ssdp_math_days ssdp_math, mi
		gen ssdp_m_t=(ssdp_math_days==6 | ssdp_math_days==10)
		replace ssdp_m_t=1 if ssdp_math==1 & ssdp_math_days==.
		label var ssdp_m_t "Math teacher had SSDP training"
		tab ssdp_m_t
		
		* Define science training as completed 9-10 days, if days missing assume > 9 
		tab ssdp_sci_days ssdp_sci, mi
		gen ssdp_s_t=(ssdp_sci_days==9 | ssdp_sci_days==10)
		replace ssdp_s_t=1 if ssdp_sci==1 & ssdp_sci_days==.
		label var ssdp_s_t "Science teacher had SSDP training"
		tab ssdp_s_t
			
		unique teacherid  // 600 teachers and records
				
		tempfile admindata
		save "`admindata'", replace
		
		use "`htreports'", clear
		
		merge 1:1 teacherid using "`admindata'"  // 1 from master, 122 from using not matched 
		keep if _merge==3 
		drop _merge 
		
		* Create indicator from admin data of whether science teacher had science ssdp, math teacher had math ssdp , and "both" teacher had either
		gen admin_treated=  ssdp_m_t ==1 if t_subjectcat==1 & ssdp_m_t ~=.
		replace admin_treated = ssdp_s_t == 1 if t_subjectcat==2 & ssdp_s_t~=.
		replace admin_treated = ssdp_m_t==1 | ssdp_s_t==1 if t_subjectcat==3 
		replace admin_treated = . if ssdp_m_t==. & ssdp_s_t==.  // 0 real changes made
				
		tab admin_treated
		
		tab admin_treated , mi
		drop if admin_treated==.  // 0 dropped
		
				
		* Create indicator of whether any teacher had either ssdp training
		gen admin_treated2 = ssdp_m_t ==1 | ssdp_s_t==1 if ssdp_m_t~=. & ssdp_s_t~=.
		
		tab admin_treated admin_treated2, mi
		corr admin_treated admin_treated2
		tab t_subjectcat admin_treated 
		tab t_subjectcat admin_treated2
		gen diff = admin_treated ~= admin_treated2 if admin_treated~=. & admin_treated2~=.
		tab diff  // allowing any teacher to take any ssdp training changes only 12 obs or 2.5 percent of teachers
		* to be consistent with definitions of treated status in other parts of paper, will use admin_treated	
	
				
	* MERGE WITH SCHOOL DATA ON STUDY ARM AND SURVEY DESIGN
	
		sort schoolid
		
		merge m:1 schoolid using "`sch_temp'"  /* all match, 478 */
		
		*Set survey design parameters
		svyset schoolid, strata(dist_stratum) weight(sch_wght)
		
		svy, over(treat): mean ssdp_m_t if t_subjectcat==1   // 63 percent of treatment math teachers treated 
		svy, over(treat): mean ssdp_s_t if t_subjectcat==2   // 42 percent of treatment science teachers treated
			
				
	* CHECK FOR VARIABLES FOR WHICH ONE OR MORE CATEGORIES MUST BE COLLAPSED (BECAUSE LESS THAN 5%)
	
		tab t_materials, mi /* Create teaching materials from local sources */
		//Relevant responses are: No(0); Yes(1)
	
		tab t_concepts, mi /* Frequency using teaching materials or visual aids */
		//Relevant responses are: Never (1); Sometimes (less than once per week) (2); Often (one or more times per week) (3)

		tab t_projects, mi /* Collect info (e.g. interest rates) from local residents */
		//Relevant responses are: No(0); Yes(1)
	
		tab t_groups, mi /* Frequency of requiring students to work in small groups */
		//Relevant responses are: Never (1); Sometimes (less than once per week) (2); Often (one or more times per week) (3)
	
		tab t_longprojects, mi /* Freq. requiring students to work on long-term projects */
		replace t_longprojects=2 if t_longprojects==3  // collapsing a small category (3) into the next one closer to "middle"
		replace t_longprojects=t_longprojects-1  
		tab t_longprojects, mi 
		//Relevant responses are: No project work (0); A little or substantial project work (1)  (most are "little")
	
		
	* SET UP FOR COLLECTION P-VALUES FOR MULTIPLE HYPOTHESIS TESTING CORRECTIONS
		gen p1 = .
		gen p2 = .
		gen str20 row_spec = "to be filled"
		local j=0
	
	* FOR TABLE 5: CONTROL AND TREATMENT PERCENTAGES AND LATE IMPACT ESTIMATES (ALSO ITT, WHICH DOES NOT APPEAR IN TABLE)
		* Note: This pools teachers who teach math, science, or both
		
		gen treated=admin_treated
				
		keep if treated~=.
		
		
		* TEACHER EVER CREATES TEACHING MATERIALS FROM LOCAL RESOURCES (t_materials)
		
			svy, over(treat): proportion t_materials if treated~=. // 1. Yes 0 and 1.Yes 1 entries in table 5
		
			svy: reg t_materials treat district#stratum  // ITT 
		
			* Because this outcome is dichotomous, use LPM in both stages and use Lee et al. 2022 correction
						
			* CORRECTION OF STANDARD ERRORS ETC FOR LATE FOLLOWING LEE ET AL 2022  
		
				* For regressions requiring sample selection, define in_sample
					capture drop in_sample
					gen in_sample=  t_materials~=.
									
				* first stage reg
					svy: reg treated treat district#stratum if in_sample==1		
					
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF // 249.47  - this is high enough that corrections aren't necessary (but correction code is here anyway)
					
													
				* unadjusted second stage estimation 
					gen score=t_materials   // giving the name score to use tF correction program from Table 2 do-file
					
					svy: ivregress 2sls score district#stratum (treated = treat) if in_sample==1
						
					estimates store table5_row1
					scalar rsquared=e(r2)	
									
				* run program for adjustments 
					capture drop testscore
					gen testscore = t_materials
					global reg_command "`e(cmdline)'"
						
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.000 // adustment factor in 3a for F5below
					scalar A5_above = 	1.000  // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370	// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.097	// adjustment factor in 3b for F1below
					scalar A1_above =	1.059	
													
					tFcorrections // running program
					
					local j=`j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace row_spec = "make from local resources" if _n==`j'
																		
			* show results for table 5 row 1	
			
					di "OUTCOME VAR IS:  " row_spec[`j']
					etable, column(estimates) estimates(table5_row1) showstars showstarsnote ///
					keep(treat inter treated interlate) cstat(_r_b) cstat(_r_se) cstat(_r_ci) ///
					mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
					di "r-squared = " rsquared  // less rounded
					
					print_res  // display adjusted results
					
					
						
		* FREQUENCY USING TEACHING MATERIALS OR VISUAL AIDS (t_concepts)
		
		
			svy, over(treat): proportion t_concepts 
					
			svy: oprobit t_concepts treat district#stratum  // ITT 
			
		
			* late 
			* Because this outcome is polychotomous, we use ordered probit, and we use control function method rather than IV 
			svy: reg treated treat district#stratum if t_concepts~=.
			
			predict firststageresid, resid
			
			svy: oprobit t_concepts treated firststageresid district#stratum // first stage insignif
			estimates store controlfunction
			
			
			test firststageresid  // p= .492 
			test treated  // p=.074
			
				
			local j=`j' + 1
			replace p1 = r(p) if _n==`j'
			replace p2 = r(p) if _n==`j'
			replace row_spec = "materials visual aids" if _n==`j'
			
			margins, dydx(treated) at(treated=0) /* marginal effects */	
					
			drop firststageresid 
				
							
		*TEACHER EVER COLLECTS OR REQUIRES STUDENTS TO COLLECT LOCAL INFORMATION 
					
			svy, over(treat): proportion t_projects  // 1. Yes 0 and 1.Yes 1 entries in table 5
		
			svy: reg t_projects treat district#stratum  // ITT 
		
			* Because this outcome is dichotomous, use LPM in both stages and use Lee et al. 2022 correction
						
			* CORRECTION OF STANDARD ERRORS ETC FOR LATE FOLLOWING LEE ET AL 2022  
			
		
				* For regressions requiring sample selection, define in_sample
					capture drop in_sample
					gen in_sample=  t_projects~=.
									
				* first stage reg
					svy: reg treated treat district#stratum if in_sample==1					
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF // 246.7  - this is high enough that corrections aren't necessary (but correction code is here anyway)
															
				* unadjusted second stage estimation 
					capture drop score
					gen score=t_projects   // giving the name score to use tF correction program from Table 2 do-file
					
					svy: ivregress 2sls score district#stratum (treated = treat) if in_sample==1
								
					
					estimates store table5_row3
					scalar rsquared=e(r2)	
									
				* run program for adjustments 
					capture drop testscore
					gen testscore = t_projects
					global reg_command "`e(cmdline)'"
					
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.000 // adustment factor in 3a for F5below
					scalar A5_above = 	1.000  // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370	// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.097	// adjustment factor in 3b for F1below
					scalar A1_above =	1.059	
						
												
					tFcorrections // running program
					
					local j=`j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace row_spec = "local information" if _n==`j'
													
			* show results for table 5 row 3	
			
					di "OUTCOME VAR IS:  " row_spec[`j']
					etable, column(estimates) estimates(table5_row3) showstars showstarsnote ///
					keep(treat inter treated interlate) cstat(_r_b) cstat(_r_se) cstat(_r_ci) ///
					mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
					di "r-squared = " rsquared  // less rounded
					
					print_res  // display adjusted results
				
					
									

		*TEACHER'S FREQUENCY OF REQUIRING STUDENTS TO WORK IN SMALL GROUPS 
				
			svy, over(treat): proportion t_groups  if treated~=.
					
			svy: oprobit t_groups treat district#stratum  // ITT 
		
			
			* late 
			* Because this outcome is polychotomous, we use ordered probit, and we use control function method rather than IV 
			svy: reg treated treat district#stratum if t_groups~=.
			predict firststageresid, resid
			
			svy: oprobit t_groups treated firststageresid district#stratum 
							
			test firststageresid  // p= .078  signif at 10 percent level
			test treated 
			
			margins, dydx(treated) at(treated=0) /* marginal effects */	
			
			* since firststage resid borderline signif, try full maximum likelihood (treating instrument as continuous, consistent with LPM)
			svy: eoprobit t_groups district#stratum, endogenous(treated = district#stratum treat)  // estimates similar to control function estimates
			
			* try with first stage probit
			*svy: eoprobit t_groups district#stratum, endogenous(treated = district#stratum treat, probit)  // does not succeed
			
			local j=`j' + 1
			replace p1 = r(p) if _n==`j'
			replace p2 = r(p) if _n==`j'
			replace row_spec = "frequency of requiring groups" if _n==`j'
			
			margins, dydx(treated) at(treated=0) /* marginal effects */	
					
			drop firststageresid 

					
		*TEACHER EVER REQUIRES STUDENTS TO WORK ON LONGER TERM PROJECTS 
			
			svy, over(treat): proportion t_longprojects  // 1. Yes 0 and 1.Yes 1 entries in table 5
		
			svy: reg t_longprojects treat district#stratum  // ITT 
		
			* Because this outcome is dichotomous, use LPM in both stages and use Lee et al. 2022 correction
						
			* CORRECTION OF STANDARD ERRORS ETC FOR LATE FOLLOWING LEE ET AL 2022  
		
				* For regressions requiring sample selection, define in_sample
					capture drop in_sample
					gen in_sample=  t_longprojects~=.
									
				* first stage reg
					svy: reg treated treat district#stratum if in_sample==1					
					
				* grab F stat from first stage for use in calculating correction to second stage standard errors
					test treat 
					scalar FF=r(F)  // This is the F stat that is the basis for Lee et al. 2022 corrections of second stage standard errors
					di FF // 251.496  - this is high enough that corrections aren't necessary (but correction code is here anyway)
															
				* unadjusted second stage estimation 
					capture drop score
					gen score=t_longprojects  // giving the name score to use tF correction program from Table 2 do-file
					svy: ivregress 2sls score district#stratum (treated = treat) if in_sample==1
				
					estimates store table5_row5
					scalar rsquared=e(r2)	
									
				* run program for adjustments 
					capture drop testscore
					gen testscore = t_longprojects
					global reg_command "`e(cmdline)'"
						
					scalar F5_below = 	104.67  //  F in table 3a just below FF 
					scalar F5_above = 	104.67  // F in table 3a just above FF
					scalar A5_below = 	1.000 // adustment factor in 3a for F5below
					scalar A5_above = 	1.000  // adjustment factor in 3a for F5above
					scalar F1_below = 	174.370		// F in table 3b just below FF
					scalar F1_above = 	252.342		// F in table 3b just above FF
					scalar A1_below =	1.097	// adjustment factor in 3b for F1below
					scalar A1_above =	1.059	
													
					tFcorrections // running program
					
					local j=`j' + 1
					replace p1 = p_noadj if _n==`j'
					replace p2 = p_adj if _n==`j'
					replace row_spec = "long projects" if _n==`j'
					
								
			* show results for table 5 row 3	
			
					di "OUTCOME VAR IS:  " row_spec[`j']
					etable, column(estimates) estimates(table5_row5) showstars showstarsnote ///
					keep(treat inter treated interlate) cstat(_r_b) cstat(_r_se) cstat(_r_ci) ///
					mstat(N) mstat(r2) stars(.10 "*" .05 "**" .01 "***", attach(_r_b))  
					di "r-squared = " rsquared  // less rounded
					
					print_res  // display adjusted results
					
					

		*MULTIPLE HYPOTHESIS TESTING CORRECTIONS
			
			keep if _n<=5
			keep p1 p2 row_spec   //  none of the p2s are different from p1s (because all first stage f's high)
			list
			
			qqvalue p1, method(yekutieli) qvalue(q1)
			list row_spec p1 q1 

log close